# Dickstein, Ho, and Mark (2023)
# This code creates a household-option level dataset in the format used
# in the counterfactual exercises.

# * # * # * # * # * # * #
# PRELIMINARIES         #
# * # * # * # * # * # * #
try(library(fastDummies))
counterfactual_folder <- paste0(project_folder, "/analysis/counterfactuals")

# Calling the function that creates the household-option level dataset
source(paste0(project_folder, "/library/DA01QuanModifyExplodedDFunc.R"))

# * # * # * # * # * # * #
# LOADING DATA          #
# * # * # * # * # * # * #

## Loading the household-level dataset that includes the uninsured households
SubDat <- fread(paste0(project_folder, "/data/counterfactual_all_subscribers.csv"))

# Adding household id:
SubDat[, hhid := 1:.N, ]
print(paste0("There are", nrow(SubDat), "subscriber observations when loaded."))
SubDat[, best_guess_sumrate := best_guess_frate + best_guess_srate, ]

## Loading premium dataset
vars_to_keep_prems <- c(
  "subscriberid", "year", "constructed_plan_year", "PAYER_ID", "MNC",
  "grossprem", "best_guess_Subsidy", "best_guess_AV", "plan_AV", "best_guess_netprem")
all_choice_premiums <- fread(paste0(project_folder, "/data/AllChoicePremiumsPostMerge.csv"), select = vars_to_keep_prems)
all_choice_premiums <- all_choice_premiums[year %in% 2014:2016]
all_choice_premiums <- all_choice_premiums[substr(all_choice_premiums$constructed_plan_year, 8, 8) %in% 2:4]
is.identified(all_choice_premiums, c("subscriberid", "constructed_plan_year"))

# * # * # * # * # * # * # * # * # * #
# Cleaning the household level data #
# * # * # * # * # * # * # * # * # * #

# Keep the following variables:
var_list <- list()

# IDs:
## subscriberid, year
var_list$idvars <- c("hhid", "subscriberid", "year")
# Choice Variables:

## constructed plan ID, and variable that go in it:
var_list$choice_vars <- c()
var_list$choice_vars.backup <- c("markettype", "exog_exchange", "best_guess_ra", "payer_id", "best_guess_metal", "mnc_plantype")

# Non-discretionary spending variables:
## subscriber age, number of kids, number of spouses,
## mean ACG, max ACG
var_list$ndspendingvars <- c("age_bins_label", "withkids", "married", "acg_quartiles_label", "acg_max_quartiles_label")
var_list$ndspendingvars.backup <- c("age", "ndeps", "nspouse", "sum_concurrent_risk", "max_concurrent_risk")

# Discretionary spending (moral hazard) variables:
var_list$moralhazardvars <- c()
var_list$moralhazardvars.backup <- c()

# Risk Aversion variables:
## Income
var_list$riskavervars <- c("fpl_bins")
var_list$riskavervars.backup <- c("best_guess_incomeoverFPL")

# Cost Variables:
## Total Cost, Total OOP Cost
var_list$costvars <- c("totpaid", "totcopay", "totcoins", "totdeduct", "numzeroclaims", "nummonths_observed", "nummonths_span", "totpaidmonth_observed", "totpaidmonth_span")

# Chosen plan variables: 
var_list$premergevars <- c("grossprem_old", "best_guess_Subsidy_old", "best_guess_AV_old", "best_guess_netprem_old", "best_guess_sumrate")

SubDat <- SubDat[, unlist(var_list), with = F]

# * # * # * # * # * # * # * # * # * # * # * # * # * #
# Creating outside option for each household-year   #
# * # * # * # * # * # * # * # * # * # * # * # * # * #

outsideoptions <- all_choice_premiums[, c(
  list(constructed_plan_year = "Outside_Option"),
  lapply(.SD, function(x) 0)),
  by = c("subscriberid", "year"),
  .SDcols = vars_to_keep_prems[which(!vars_to_keep_prems %in% c("subscriberid", "year", "constructed_plan_year"))]]
all_choice_premiums <- rbind(all_choice_premiums, outsideoptions)

ExplodedDat <- merge(
  SubDat,
  all_choice_premiums,
  by = c("subscriberid", "year"),
  allow.cartesian = T,
  all.x = T,
  suffixes = c(".chosen", ".option"))

# * # * # * #
# Kondo!    #
# * # * # * #

ExplodedDat[, c("payer_id", "mnc_plantype") := NULL, ]
names(ExplodedDat)[which(names(ExplodedDat) == "fpl_bins")] <- "inc_over_fpl_bins"
names(ExplodedDat)[which(names(ExplodedDat) == "markettype")] <- "markettype_old"
names(ExplodedDat)[which(names(ExplodedDat) == "best_guess_metal")] <- "best_guess_metal_old"
acgquinsvec <- as.numeric(scan(file=paste0(project_folder, "/data/orig/acg_positive_quintiles"), sep = ",", what = "character")[2:7])
print(paste0("Modifying... ravec: ", paste(ravec, collapse = " and "), "yrvec: ", paste(yrvec, collapse = " and ")))
mod_exploded_data <- mod_exploded_data(
  ExplodedDat, 
  paste0(project_folder, "/data"),
  highmeanthresh = 0.59616,
  lowsumthresh = 0.2575,
  highsumthresh = 1.76329,
  highmaxthresh = 3.37157,
  medincome = 2.746942,
  acgquins = acgquinsvec,
  winsorbound = 89.3707340916662,
  ravec = ravec,
  yrvec = yrvec)
rm(ExplodedDat)
rm(all_choice_premiums)
rm(SubDat)
